#共青团赣州市委监制
from bs4 import BeautifulSoup
import requests
import time
import random

print("Start time : %s" % time.ctime())
# cookieDATA=input("输入登录信息cookie：")
cookieDATA="_T_WM=2da1; SCF=9xfh94.; SUB=yfQP; SUHB=33D; SSOLoginState=1550; M_WEIBOCN_PARAMS=74"#防盗号内容有删减
cookie={"Cookie":cookieDATA}
url = 'http://weibo.cn/u/1064063781'
print(url)
print('wbSpider准备就绪')

uiddata=[]      #待爬微博的uid集合
with open('uiddata.txt', 'r',encoding='utf-8',errors='ignore') as f:
    for uid in f.readlines():
        uiddata.append(uid.strip('\n'))


uiddata_length=len(uiddata)
for i in range(0,uiddata_length):
    user_id =uiddata[i]
    url = 'http://weibo.cn/u/%s'%user_id
    web_data_pre = requests.get(url, cookies=cookie)
    web_data_pre.encoding = 'utf-8'
    soup_pre = BeautifulSoup(web_data_pre.text, 'lxml')

    result = []
    weibo_name = soup_pre.select('.ctt')[1].text
    print('好了，我要开始看看：',weibo_name)
    result.append(weibo_name)

    for tip2 in soup_pre.select('.tip2'):       #先获取微博名称、粉丝量、关注量、博客总数
        weibo_total = tip2.select('.tc')[0].text
        weibo_follow = tip2.select('a')[0].text
        weibo_fans = tip2.select('a')[1].text
        result.append(weibo_total)
        result.append(weibo_follow)
        result.append(weibo_fans)
        print(weibo_total, weibo_follow, weibo_fans)
    result.append('\n')


    for page in range(1, 50):
        url = 'http://weibo.cn/u/%s?page=%d' % (user_id, page)
        web_data = requests.get(url, cookies=cookie)
        web_data.encoding = 'utf-8'
        soup = BeautifulSoup(web_data.text, 'lxml')
        # print(soup)

        for c in soup.select('.c'):
            if (len(c.select('.cmt')) > 3):  # 存在转发
                result.append(c.select('.cmt')[0].text)  # 转发来源
                if (len(c.select('div')) > 2):
                    result.append((c.select('div')[2].text))
                elif (len(c.select('div')) > 1):
                    result.append(c.select('div')[1].text)  # 理由全文

                # result.append(c.select('.ct')[0].text)  # 转发时间等信息
                result.append('\n')

            elif (len(c.select('.ctt')) > 0):
                if (len(c.select('div')) > 1):
                    result.append(c.select('.ctt')[0].text)  # 原创内容
                    result.append(c.select('div')[1].text)  # 发表时间等信息
                elif (len(c.select('div')) > 0):
                    result.append(((c.select('div')[0].text)))  # 专业版微博发表的博文及时间等信息
                result.append('\n')
            print(c)
        time.sleep(15 * random.random())  # 随机睡眠控制爬取频率
    # print(weibo_name,':阅览结束- = - = - = - = - = - = - = ')
    print("主人别急，我先歇会儿O(∩_∩)O")
    fo = open("wbdata%s.txt" % user_id, "w", encoding='utf-8')
    fo.write(str('\n'.join(result)))
    time.sleep(120)  # 睡眠控制下一个微博开始爬取的时间为**s以后

print('全部执行完毕')
print("End time : %s" % time.ctime())